# -*- coding: utf-8 -*-
import scrapy
from ..items import MaoyanItem


class Maoyan2Spider(scrapy.Spider):
    name = 'maoyan2'
    allowed_domains = ['maoyan.com']
    # start_urls = ['http://maoyan.com/']

    # 重写start_requests()方法，把所有的url地址传给调度器
    def start_requests(self):
        for offset in range(0, 100, 10):
            url = 'https://maoyan.com/board/4?offset={}'.format(offset)
            yield scrapy.Request(
                url=url,
                callback=self.parse_html
            )

    def parse_html(self, response):
        item_info_list = response.xpath('//dl[@class="board-wrapper"]/dd//div[@class="movie-item-info"]')
        for item_info in item_info_list:
            # 创建item对象
            item = MaoyanItem()
            # extract_first 取出第一个
            item['name'] = item_info.xpath('./p[@class="name"]/a/text()').extract_first().strip()
            item['star'] = item_info.xpath('./p[@class="star"]/text()').extract_first().strip()
            item['time'] = item_info.xpath('./p[@class="releasetime"]/text()').extract_first().strip()[5:]

            # 将item提交给管道pipeline 在setting中设置ITEM_PIPELINES 优先级1-1000,越小优先级越高
            yield item

